Hot and cold days#

https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/GHCND_documentation.pdf

import warnings
warnings.filterwarnings("ignore")
import os
import os.path as op
import sys

import numpy as np
import pandas as pd
from datetime import datetime

sys.path.append("../../../../indicators_setup")
from ind_setup.plotting_int import plot_timeseries_interactive
from ind_setup.colors import get_df_col

sys.path.append("../../../functions")
from data_downloaders import GHCN
from temp_func import exceedance_rate_for_base_period, exceedance_rate_for_outbase_period

Define location and variables of interest#

country = 'Palau'
vars_interest = ['TMIN', 'TMAX']

Get Data#

update_data = False
path_data = "../../../data"

Using Koror Station#

if update_data:
    df_country = GHCN.get_country_code(country)
    print(f'The GHCN code for {country} is {df_country["Code"].values[0]}')

    df_stations = GHCN.download_stations_info()
    df_country_stations = df_stations[df_stations['ID'].str.startswith(df_country.Code.values[0])]
    print(f'There are {df_country_stations.shape[0]} stations in {country}')
if update_data:
    GHCND_dir = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/'
    id = 'PSW00040309' # Koror Station
    dict_min = GHCN.extract_dict_data_var(GHCND_dir, 'TMIN', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
    dict_max = GHCN.extract_dict_data_var(GHCND_dir, 'TMAX', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
    st_data = pd.concat([dict_min['data'], (dict_max['data'])], axis=1).dropna()
    st_data['DATE'] = st_data.index
    st_data['DAY'] = "2024-" + st_data['DATE'].dt.strftime('%m-%d')
    st_data['DAY'] = pd.to_datetime(st_data['DAY'], format='%Y-%m-%d')
    st_data.index = range(len(st_data))
    st_data.to_pickle(op.join(path_data, 'GHCN_surface_temperature_hotdays.pkl'))
else:
    st_data = pd.read_pickle(op.join(path_data, 'GHCN_surface_temperature_hotdays.pkl'))
from datetime import datetime
import pandas as pd
import numpy as np

from datetime import datetime
import pandas as pd
import numpy as np

def exceedance_rate_for_base_period(climate_data, variable_name):
    exceedance_rates = {}
    all_exceedance_data = {}

    base_period_years = range(1961, 1991)
    base_period_data = climate_data[climate_data['DATE'].dt.year.isin(base_period_years)]

    # Precompute thresholds for all days in the base period
    thresholds = {}
    for day in base_period_data['DAY'].unique():
        thresholds[day] = centered_percentile(day, base_period_data, variable_name)

    for out_of_base_year in base_period_years:
        out_of_base_data = climate_data[climate_data['DATE'].dt.year == out_of_base_year].copy()
        out_of_base_data['THRESHOLD'] = out_of_base_data['DAY'].map(thresholds)

        if variable_name == "TMAX":
            exceedance_rate = (out_of_base_data[variable_name] > out_of_base_data['THRESHOLD']).mean()
        elif variable_name == "TMIN":
            exceedance_rate = (out_of_base_data[variable_name] < out_of_base_data['THRESHOLD']).mean()

        exceedance_rates[out_of_base_year] = exceedance_rate
        all_exceedance_data[out_of_base_year] = {out_of_base_year: exceedance_rate}

    return exceedance_rates, all_exceedance_data

def centered_percentile(date, base_df, variable_name):
    filtered_df = base_df[(base_df["DATE"] >= datetime(1960, 12, 29)) & (base_df["DATE"] <= datetime(1991, 1, 2))]
    window_values = []

    for x in filtered_df[filtered_df['DAY'] == date]['DATE']:
        window_values.extend(filtered_df[(filtered_df['DATE'] >= x - pd.Timedelta(days=2)) &
                                         (filtered_df['DATE'] <= x + pd.Timedelta(days=2))][variable_name].tolist())

    if variable_name == "TMAX":
        return np.percentile(window_values, 90)
    elif variable_name == "TMIN":
        return np.percentile(window_values, 10)

def exceedance_rate_for_outbase_period(climate_data, variable_name):
    date_range = pd.date_range('2024-01-01', '2024-12-31', freq='D')
    df_exceedance = pd.DataFrame({'DAY': date_range})

    df_exceedance['THRESHOLD'] = df_exceedance['DAY'].apply(lambda day_value: centered_percentile(day_value, climate_data, variable_name))

    return df_exceedance

def centered_percentile(date, base_df, variable_name):
    filtered_df = base_df[(base_df["DATE"] >= datetime(1960, 12, 29)) & (base_df["DATE"] <= datetime(1991, 1, 2))]
    window_values = []

    for x in filtered_df[filtered_df['DAY'] == date]['DATE']:
        window_values.extend(filtered_df[(filtered_df['DATE'] >= x - pd.Timedelta(days=2)) &
                                         (filtered_df['DATE'] <= x + pd.Timedelta(days=2))][variable_name].tolist())

    if variable_name == "TMAX":
        return np.percentile(window_values, 90)
    elif variable_name == "TMIN":
        return np.percentile(window_values, 10)
temp_table = {}
exceed_rates_TMAX = exceedance_rate_for_outbase_period(st_data, "TMAX")
exceed_rates_TMIN = exceedance_rate_for_outbase_period(st_data, "TMIN")
TMAX_dict = dict(zip(exceed_rates_TMAX['DAY'], exceed_rates_TMAX['THRESHOLD']))
TMIN_dict = dict(zip(exceed_rates_TMIN['DAY'], exceed_rates_TMIN['THRESHOLD']))
df_exceed = st_data.copy()
df_exceed['THRESHOLD_TMAX'] = df_exceed['DAY'].apply(lambda day_value:TMAX_dict.get(day_value))
df_exceed['HOT_DAY'] = df_exceed[['TMAX',"THRESHOLD_TMAX"]].apply(lambda x: x["TMAX"] > x["THRESHOLD_TMAX"],axis=1)

df_exceed['THRESHOLD_TMIN'] = df_exceed['DAY'].apply(lambda day_value:TMIN_dict.get(day_value))
df_exceed['COLD_NIGHT'] = df_exceed[['TMIN',"THRESHOLD_TMIN"]].apply(lambda x: x["TMIN"] < x["THRESHOLD_TMIN"],axis=1)
df_exceed['HOT_DAY'].mean()
np.float64(0.10654076990710654)
df_exceed[df_exceed["DATE"] > datetime(1990, 12, 31)]['HOT_DAY'].mean()
np.float64(0.18184143222506394)
df_exceed['YEAR'] = pd.DatetimeIndex(st_data['DATE']).year
out_of_base_hot = {}
out_of_base_cold = {}
for x in df_exceed["YEAR"].unique():
    if x > 1990:
        out_of_base_hot[x] = df_exceed[df_exceed["YEAR"] == x]['HOT_DAY'].mean()
        out_of_base_cold[x] = df_exceed[df_exceed["YEAR"] == x]['COLD_NIGHT'].mean()
ex_cold, all_cold = exceedance_rate_for_base_period(st_data, "TMIN")
ex_hot, all_hot = exceedance_rate_for_base_period(st_data, "TMAX")
all_hot = ex_hot|out_of_base_hot
all_cold = ex_cold|out_of_base_cold
cold_bar = sum(ex_cold.values()) / len(ex_cold)
hot_bar = sum(ex_hot.values()) / len(ex_hot)
hot_anom = {}

for x in all_hot:
    hot_anom[x] = 100*(all_hot[x]-hot_bar)

cold_anom = {}
for x in all_cold:
    cold_anom[x] = 100*(all_cold[x]-cold_bar)
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
df_cold_anom = pd.DataFrame.from_dict(cold_anom, orient='index', columns=['Perc_Anom'])
df_cold_anom.index = pd.to_datetime(df_cold_anom.index, format='%Y')

df_hot_anom = pd.DataFrame.from_dict(hot_anom, orient='index', columns=['Perc_Anom'])
df_hot_anom.index = pd.to_datetime(df_hot_anom.index, format='%Y')
dict_plot = [{'data' : df_cold_anom, 'var' : 'Perc_Anom', 'ax' : 1, 'label' : 'Cold Nights'},
             {'data' : df_hot_anom, 'var' : 'Perc_Anom', 'ax' : 1, 'label' : 'Hot Days'}]
fig = plot_timeseries_interactive(dict_plot, trendline=True, figsize = (25, 12))